## Define function that recodes to numeric, but watches out to coercion to not introduce NAs
colstonumeric <- function(df){
tryCatch({
df_num <- as.data.frame(
lapply(df,
function(x) { as.numeric(as.character(x))}))
},warning = function(stop_on_warning) {
message("Stoped the execution of numeric conversion: ", conditionMessage(stop_on_warning))
})
}
##
## Define function that reverse codes items
ReverseCode <- function(df, tonumeric = FALSE, min = NULL, max = NULL) {
if(tonumeric) df <- colstonumeric(df)
df <- (max + min) - df
}
##
## Define function that scores only rows with less than 10% NAs (returns NA if all or above threshold percentage of rows are NA); can reverse code if vector of column indexes and min, max are provided.
ScoreLikert <- function(df, napercent = .1, tonumeric = FALSE, reversecols = NULL, min = NULL, max = NULL, engine = "sum") {
reverse_list <- list(reversecols = reversecols, min = min, max = max)
reverse_check <- !sapply(reverse_list, is.null)
# Recode to numeric, but watch out to coercion to not introduce NAs
colstonumeric <- function(df){
tryCatch({
df_num <- as.data.frame(
lapply(df,
function(x) { as.numeric(as.character(x))}))
},warning = function(stop_on_warning) {
message("Stoped the execution of numeric conversion: ", conditionMessage(stop_on_warning))
})
}
if(tonumeric) df <- colstonumeric(df)
if(all(reverse_check)){
df[ ,reversecols] <- (max + min) - df[ ,reversecols]
}else if(any(reverse_check)){
stop("Insuficient info for reversing. Please provide: ", paste(names(reverse_list)[!reverse_check], collapse = ", "))
}
if(engine == "sum") {
return(
ifelse(rowSums(is.na(df)) > ncol(df) * napercent,
NA,
rowSums(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
)
)
}
if(engine == "mean") {
return(
ifelse(rowMeans(is.na(df)) > ncol(df) * napercent,
NA,
rowMeans(df, na.rm = TRUE) * NA ^ (rowSums(!is.na(df)) == 0)
)
)
}
if(engine == "mean_na") {
df[is.na(df)] <- 0
rowMeans(df)
}
}folder <- here::here("Rsyntax&data")
data_name <- "survey_686732_R_data_file.csv"
script_name <- "survey_686732_R_syntax_file.R"
# Check most recent .csv file
last_csv_file <-
dir(folder, pattern = ".*csv", full.names = TRUE) %>%
file.info() %>%
dplyr::arrange(dplyr::desc(ctime)) %>%
dplyr::slice(1) %>%
row.names()
if(identical(last_csv_file, file.path(folder, data_name))) {
cat("Most recent .csv is used.")
} else {
cat("NOT using the most recent .csv!")
}Most recent .csv is used.
# -------------------------------------------------------------------------
# Read data
library(limonaid)
library(sticky) # need this for sticky labels
df <- limonaid::ls_import_data(
datafile = file.path(folder, data_name),
scriptfile = file.path(folder, script_name),
massConvertToNumeric = FALSE
)
df_compl <-
df %>%
filter(lastpage == 17)
# -------------------------------------------------------------------------
# Labels to factor levels levels ("label" = question text; "labels" = response options text)
# library(labelled)
# library(sjlabelled)
# sjlabelled::get_labels(df$G01Q59_SQ008, attr.only = TRUE, values = "as.prefix")
# sjlabelled::get_values(df$G01Q59_SQ008)
# sjlabelled::as_label(df$G01Q59_SQ008, prefix = TRUE, keep.labels = TRUE)
# sjlabelled::as_character(df$G01Q59_SQ008, prefix = TRUE, keep.labels = TRUE)
# labelled::var_label(df$G01Q59_SQ008)
# labelled::to_factor(df$G01Q59_SQ008, levels = "values")
lime_label_recode <- function (x, prefix = FALSE) {
labels <- attr(x, "labels", exact = TRUE)
if (is.null(labels)) {
x
} else {
labels <- unname(labels)
values <- names(attr(x, "labels", exact = TRUE))
if (prefix) {
labels <- sprintf("[%s] %s", values, labels)
}
# No recoding solution preserve attributes, even with sticky
x_rec <- c(labels, x)[match(x, c(values, x))]
attributes(x_rec) <- attributes(x) # reattach attributes
x_rec
}
}
# test_df <- cbind(df$G02Q02_SQ021, lime_label_recode(df$G02Q02_SQ021))
# lime_label_recode(df$G01Q59_SQ008)
# lime_label_recode(df$G04Q05_SQ001)
# -------------------------------------------------------------------------
# Recode using labels
# cols_to_recode <- lapply(df, function(x) {!is.null(attr(x, "labels", exact = TRUE))})
# cols_to_recode <- which(unlist(cols_to_recode))
# df_recoded <- df
# list_recoded <- lapply(df_recoded[, cols_to_recode], lime_label_recode)
# df_recoded[, cols_to_recode] <- as.data.frame(do.call(cbind, list_recoded))
# df_recoded <-
# df %>%
# mutate(across(all_of(cols_to_recode), lime_label_recode))
df_recoded <-
df %>%
mutate(across(everything(), lime_label_recode)) %>% # some values have same labels: df$G01Q60_SQ006
mutate(across(where(is.character), function(col) iconv(col, to="UTF-8"))) # encoding: df_recoded$G01Q56# ------------------------------------------------------------------------------
# Define 3 scales
# ------------------------------------------------------------------------------
# ATSPPH - 10 items (likert 0-3) total sum
atspph_idx <- 184:193 # grep("G06Q13", names(df)); df[, grep("G06Q13", names(df), value = TRUE)]
atspph_labs <- unique(lapply(df[, atspph_idx], attr, "labels"))
atspph_rev <- c(2, 4, 8, 9, 10)
atspph_recode <- function(df, rev) {
df %>%
mutate(
across(everything(),
~ case_when(
. == "AO02" ~ 0,
. == "AO03" ~ 1,
. == "AO04" ~ 2,
. == "AO05" ~ 3
)
)
) %>%
mutate( # here reverse code
across(rev,
~ 3 - .x
)
)
} # atspph_recode(df_compl[, atspph_idx], atspph_rev)
# FSozU - 6 items (likert 1-5) total mean
fsozu_idx <- 222:227 # grep("G12Q45", names(df)); df[, grep("G12Q45", names(df), value = TRUE)]
fsozu_labs <- unique(lapply(df[, fsozu_idx], attr, "labels"))
fsozu_recode <- function(df) {
df %>%
mutate(
across(everything(),
~ case_when(
. == "AO01" ~ 1,
. == "AO02" ~ 2,
. == "AO03" ~ 3,
. == "AO04" ~ 4,
. == "AO05" ~ 5
)
)
)
} # fsozu_recode(df_compl[, fsozu_idx])
# PMHSS - 24 items (likert 1-5) subscale sum
pmhss_idx <- 228:251 # grep("G13Q46", names(df)); df[, grep("G13Q46", names(df), value = TRUE)]
pmhss_labs <- unique(lapply(df[, pmhss_idx], attr, "labels"))
pmhss_aware <- c(2, 4, 5, 6, 8, 10, 11, 12)
pmhss_agree <- c(14, 16, 17, 18, 20, 22, 23, 24)
pmhss_posit <- c(1, 3, 7, 9, 13, 15, 19, 21)
pmhss_recode <- function(df) {
df %>%
mutate(
across(everything(),
~ case_when(
. == "AO01" ~ 1,
. == "AO02" ~ 2,
. == "AO03" ~ 3,
. == "AO04" ~ 4,
. == "AO05" ~ 5
)
)
)
} # pmhss_recode(df_compl[, pmhss_idx])
# ------------------------------------------------------------------------------
# Recode & Score
df_compl[, atspph_idx] <- atspph_recode(df_compl[, atspph_idx], atspph_rev)
df_compl[, fsozu_idx] <- fsozu_recode(df_compl[, fsozu_idx])
df_compl[, pmhss_idx] <- pmhss_recode(df_compl[, pmhss_idx])
df_compl$help_seek <- ScoreLikert(df_compl[, atspph_idx], napercent = .5, engine = "sum")
df_compl$soc_supp <- ScoreLikert(df_compl[, fsozu_idx], napercent = .5, engine = "mean")
df_compl$aware <- ScoreLikert(df_compl[, pmhss_idx][pmhss_aware], napercent = .5, engine = "sum")
df_compl$agree <- ScoreLikert(df_compl[, pmhss_idx][pmhss_agree], napercent = .5, engine = "sum")
df_compl$posit <- ScoreLikert(df_compl[, pmhss_idx][pmhss_posit], napercent = .5, engine = "sum")NULL
# find_mod(df_scales)
# moderation_model_list #1,2,3,6,7,10,11,12
mod_synth <-
moderation_model_list %>%
purrr::pluck("Syntax") %>%
stringr::str_match("# Regressions\\\n(.*?)\\\n\\\n#") %>% # string between "# Regressions\n" and "\n\n#"
as.data.frame() %>%
dplyr::pull(2) %>%
stringr::str_remove_all(fixed("b0*1 + "))
mod_tabl <-
moderation_model_list %>%
purrr::pluck("Model")
for(i in seq_len(length(mod_tabl))) {print(mod_synth[i]); print(mod_tabl[[i]])}[1] "help_seek ~ b1*age + b2*aware + b3*ageaware"
[1] "help_seek ~ b1*age + b2*agree + b3*ageagree"
[1] "soc_supp ~ b1*agree + b2*aware + b3*agreeaware"
[1] "aware ~ b1*age + b2*help_seek + b3*agehelp_seek"
[1] "aware ~ b1*agree + b2*soc_supp + b3*agreesoc_supp"
[1] "aware ~ b1*posit + b2*soc_supp + b3*positsoc_supp"
[1] "agree ~ b1*age + b2*help_seek + b3*agehelp_seek"
[1] "agree ~ b1*posit + b2*soc_supp + b3*positsoc_supp"
[1] "agree ~ b1*posit + b2*aware + b3*positaware"
[1] "posit ~ b1*aware + b2*soc_supp + b3*awaresoc_supp"
[1] "posit ~ b1*agree + b2*soc_supp + b3*agreesoc_supp"
[1] "posit ~ b1*agree + b2*aware + b3*agreeaware"
[1] "age ~ b1*aware + b2*help_seek + b3*awarehelp_seek"
[1] "age ~ b1*agree + b2*help_seek + b3*agreehelp_seek"
[1] "age ~ b1*agree + b2*aware + b3*agreeaware"
# find_med(df_scales)
# mediation_model_list
for(i in seq_len(length(mediation_model_list$MedEs))) {print(mediation_model_list$MedEs[i]); print(mediation_model_list$PathEs[[i]])}$model_1
effect label est se lower upper z p pm
1 Indirect a × b 0.03977335 0.01721567 0.006031253 0.07351545 2.3102990 0.0208716 40.70041
2 Direct c -0.05794888 0.03647158 -0.129431877 0.01353411 -1.5888776 0.1120880 59.29959
3 Total c + a × b -0.01817553 0.03253100 -0.081935121 0.04558405 -0.5587143 0.5763567 100.00000
$model_2
effect label est se lower upper z p pm
1 Indirect a × b 0.02357673 0.01106423 0.001891225 0.045262225 2.130895 0.03309776 26.54287
2 Direct c -0.06524837 0.03687318 -0.137518477 0.007021738 -1.769535 0.07680471 73.45713
3 Total c + a × b -0.04167164 0.03555378 -0.111355766 0.028012478 -1.172074 0.24116750 100.00000
$model_3
effect label est se lower upper z p pm
1 Indirect a × b 0.0114499751 0.005176223 0.001304765 0.02159519 2.2120329 0.02696439 96.868121
2 Direct c 0.0003701934 0.008706562 -0.016694355 0.01743474 0.0425189 0.96608505 3.131879
3 Total c + a × b 0.0118201685 0.007074084 -0.002044780 0.02568512 1.6709116 0.09473913 100.000000
$model_4
effect label est se lower upper z p pm
1 Indirect a × b 0.021066430 0.004081768 0.013066311 0.029066549 5.161104 0.0000002454981 68.96143
2 Direct c -0.009481705 0.007772544 -0.024715611 0.005752201 -1.219897 0.2225038237341 31.03857
3 Total c + a × b 0.011584725 0.007085497 -0.002302595 0.025472045 1.634991 0.1020509156450 100.00000
$model_5
effect label est se lower upper z p pm
1 Indirect a × b 0.011921385 0.002903842 0.006229959 0.01761281 4.105383 0.00004036453 55.17638
2 Direct c 0.009684574 0.007859872 -0.005720493 0.02508964 1.232154 0.21789152443 44.82362
3 Total c + a × b 0.021605958 0.007732044 0.006451431 0.03676049 2.794340 0.00520057805 100.00000
$model_6
effect label est se lower upper z p pm
1 Indirect a × b 0.48006170 0.1744613 0.13812392 0.8219995 2.7516808 0.005929028 97.920426
2 Direct c 0.01019526 0.2397818 -0.45976836 0.4801589 0.0425189 0.966085047 2.079574
3 Total c + a × b 0.49025696 0.2934069 -0.08480994 1.0653239 1.6709116 0.094739131 100.000000
$model_7
effect label est se lower upper z p pm
1 Indirect a × b 0.1504592 0.02510330 0.1012576 0.1996608 5.993603 0.000000002052426 30.32096
2 Direct c 0.3457625 0.03731608 0.2726243 0.4189007 9.265776 0.000000000000000 69.67904
3 Total c + a × b 0.4962217 0.04228655 0.4133416 0.5791018 11.734739 0.000000000000000 100.00000
$model_8
effect label est se lower upper z p pm
1 Indirect a × b 0.802374 0.1533110 0.50189006 1.1028580 5.233638 0.0000001662062 71.15351
2 Direct c -0.325292 0.2666553 -0.84792675 0.1973427 -1.219897 0.2225038237341 28.84649
3 Total c + a × b 0.477082 0.2917949 -0.09482542 1.0489894 1.634991 0.1020509156450 100.00000
$model_9
effect label est se lower upper z p pm
1 Indirect a × b 0.1048390 0.01941788 0.06678061 0.1428973 5.399094 0.00000006697835 16.21354
2 Direct c 0.5417745 0.03899136 0.46535282 0.6181961 13.894731 0.00000000000000 83.78646
3 Total c + a × b 0.6466134 0.04051343 0.56720858 0.7260183 15.960472 0.00000000000000 100.00000
$model_10
effect label est se lower upper z p pm
1 Indirect a × b 0.26272501 0.02930484 0.2052886 0.32016144 8.9652432 0.00000000000000000 94.6022
2 Direct c 0.01499053 0.04004682 -0.0634998 0.09348086 0.3743251 0.70816247904824503 5.3978
3 Total c + a × b 0.27771554 0.04180285 0.1957835 0.35964762 6.6434599 0.00000000003064038 100.0000
$model_11
effect label est se lower upper z p pm
1 Indirect a × b 0.4161393 0.09870205 0.2226868 0.6095917 4.216116 0.0000248546 56.05467
2 Direct c 0.3262419 0.26477360 -0.1927048 0.8451886 1.232154 0.2178915244 43.94533
3 Total c + a × b 0.7423811 0.26567317 0.2216713 1.2630910 2.794340 0.0052005781 100.00000
$model_12
effect label est se lower upper z p pm
1 Indirect a × b 0.02964074 0.01203087 0.006060659 0.05322082 2.463723 0.01375023806726228 9.760248
2 Direct c 0.27404763 0.04482220 0.186197727 0.36189754 6.114104 0.00000000097100727 90.239752
3 Total c + a × b 0.30368837 0.04570911 0.214100160 0.39327658 6.643935 0.00000000003054157 100.000000
$model_13
effect label est se lower upper z p pm
1 Indirect a × b -0.1869264 0.09392011 -0.3710064 -0.00284633 -1.9902699 0.04656122 43.89865
2 Direct c 0.2388871 0.33008990 -0.4080773 0.88585138 0.7237030 0.46924808 56.10135
3 Total c + a × b 0.0519607 0.34025590 -0.6149286 0.71885001 0.1527107 0.87862645 100.00000
$model_14
effect label est se lower upper z p pm
1 Indirect a × b 0.28374330 0.03540910 0.21434274 0.3531438 8.0132884 0.000000000000001110223 93.579512
2 Direct c 0.01946762 0.05200726 -0.08246473 0.1214000 0.3743251 0.708162479048245474544 6.420488
3 Total c + a × b 0.30321092 0.04564051 0.21375716 0.3926647 6.6434599 0.000000000030640379123 100.000000
$model_15
effect label est se lower upper z p pm
1 Indirect a × b 0.2034478 0.08005067 0.04655136 0.3603442 2.541488 0.011038186582389 12.91465
2 Direct c 1.3718780 0.26273619 0.85692450 1.8868314 5.221504 0.000000177476211 87.08535
3 Total c + a × b 1.5753258 0.27059923 1.04496101 2.1056905 5.821619 0.000000005828013 100.00000
$model_16
effect label est se lower upper z p pm
1 Indirect a × b -0.0036200998 0.001736830 -0.007024223 -0.0002159762 -2.0843148 0.03713154 44.29214
2 Direct c 0.0045531340 0.006291440 -0.007777862 0.0168841295 0.7237030 0.46924808 55.70786
3 Total c + a × b 0.0009330342 0.006109817 -0.011041987 0.0129080555 0.1527107 0.87862645 100.00000
ggplot(df_scales, aes(aware, agree, color = posit)) +
geom_smooth(method = "loess", formula = y ~ x, se = TRUE, alpha = 0.1, color = "red", fill = "red") +
geom_point() +
scale_colour_distiller(palette = "Blues", direction = 1)
df_scales %>%
mutate(posit_cat = cut(posit,
breaks = c(5, 10, 20, 30, 40))
) %>%
ggplot(aes(aware, agree, color = posit_cat)) +
geom_point() -> plot_stigma1
plotly::ggplotly(plot_stigma1)
ggplot(df_scales, aes(posit, agree, color = aware)) +
geom_smooth(method = "loess", formula = y ~ x, se = TRUE, alpha = 0.1, color = "red", fill = "red") +
geom_point() +
scale_colour_distiller(palette = "Blues", direction = 1)
df_scales %>%
mutate(aware_cat = cut(posit,
breaks = c(5, 10, 20, 30, 40))
) %>%
ggplot(aes(posit, agree, color = aware_cat)) +
geom_point() -> plot_stigma2
plotly::ggplotly(plot_stigma2)psych::lowerMat(psych::partial.r(df_scales[, c("agree", "aware", "posit")])) agree aware posit
agree 1.00
aware 0.54 1.00
posit 0.02 0.39 1.00
mod_stigma_interac <- lm(agree ~ aware * posit, data = df_agree)
interactions::interact_plot(mod_stigma_interac, pred = posit, modx = aware)# interactions::sim_slopes(mod_stigma_interac, pred = posit, modx = aware)ggstatsplot::ggbetweenstats(df_agree, x = sex, y = agree)ggstatsplot::ggbetweenstats(df_agree, x = sex, y = aware)ggstatsplot::ggbetweenstats(df_agree, x = sex, y = posit)df_scales %>%
mutate(sex = as.numeric(as.factor(sex)) - 1) %>%
psych::mediate(posit ~ sex + aware:agree + (aware), data = .)
Mediation/Moderation Analysis
Call: psych::mediate(y = posit ~ sex + aware:agree + (aware), data = .)
The DV (Y) was posit . The IV (X) was sex agree aware*agree . The mediating variable(s) = aware .
Total effect(c) of sex on posit = -2.68 S.E. = 0.55 t = -4.85 df= 511 with p = 0.0000016
Direct effect (c') of sex on posit removing aware = -1.99 S.E. = 0.54 t = -3.71 df= 510 with p = 0.00023
Indirect effect (ab) of sex on posit through aware = -0.68
Mean bootstrapped indirect effect = -0.67 with standard error = 0.2 Lower CI = -1.08 Upper CI = -0.3
Total effect(c) of agree on posit = 0.3 S.E. = 0.04 t = 7.19 df= 511 with p = 0.0000000000023
Direct effect (c') of agree on posit removing aware = 0.08 S.E. = 0.05 t = 1.54 df= 510 with p = 0.12
Indirect effect (ab) of agree on posit through aware = 0.22
Mean bootstrapped indirect effect = 0.22 with standard error = 0.04 Lower CI = 0.13 Upper CI = 0.3
Total effect(c) of aware*agree on posit = -0.03 S.E. = 0 t = -5.77 df= 511 with p = 0.000000014
Direct effect (c') of aware*agree on posit removing aware = -0.01 S.E. = 0 t = -2.85 df= 510 with p = 0.0046
Indirect effect (ab) of aware*agree on posit through aware = -0.01
Mean bootstrapped indirect effect = -0.01 with standard error = 0 Lower CI = -0.02 Upper CI = -0.01
R = 0.5 R2 = 0.25 F = 43.54 on 4 and 510 DF p-value: 0.000000000000000000000000000000000000221
To see the longer output, specify short = FALSE in the print statement or ask for the summary
# psych::mediate(agree ~ posit + (aware), data = df_scales) # silly but works
# psych::mediate(agree ~ posit * aware, data = df_scales)df_agree <- na.omit(df_scales[, c("agree", "sex", "age", "resid", "aware", "soc_supp", "posit")])
mod_agree <- lm(agree ~ sex + age + resid + aware + soc_supp, data = df_agree)
best_mod_agree <- step(mod_agree, scope = help_seek ~ .^2, direction = "both", data = mod_agree$model, trace = 0) # BIC with k = log(nrow(mod_agree$model))
summary(best_mod_agree)
Call:
lm(formula = agree ~ sex + aware + soc_supp + sex:aware, data = df_agree)
Residuals:
Min 1Q Median 3Q Max
-22.6459 -2.9557 0.6675 3.2232 15.9948
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.31728 1.32803 5.510 0.0000000591 ***
sexMasculin -4.11805 1.53508 -2.683 0.00756 **
aware 0.42667 0.04509 9.463 < 0.0000000000000002 ***
soc_supp 0.52581 0.21332 2.465 0.01406 *
sexMasculin:aware 0.26271 0.06650 3.950 0.0000899131 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.174 on 472 degrees of freedom
Multiple R-squared: 0.3897, Adjusted R-squared: 0.3846
F-statistic: 75.35 on 4 and 472 DF, p-value: < 0.00000000000000022
df_helpseek <- na.omit(df_scales[, c("help_seek", "sex", "age", "agree", "aware", "soc_supp", "agree", "posit")])
mod_helpseek <- lm(help_seek ~ sex + age + agree + aware + soc_supp, data = df_helpseek)
best_mod_helpseek <- step(mod_helpseek, scope = help_seek ~ .^2, direction = "both", data = mod_helpseek$model, trace = 0) # BIC with k = log(nrow(mod_helpseek$model))
summary(best_mod_helpseek)
summary(lm(help_seek ~ sex + age * aware, data = df_scales))